# 
#  model1.r
#  
# This file replicates results for the main model with normal random effects 
# It: (1) prepares the data (centering, z-std.), (2) runs JAGS, 
# (3) calculates posterior summaries and quantities of interest
# 
# Note: the corresponding JAGS model file is "model1.bug"
# 



rm(list=ls())

# Load Libs
library(foreign)
library(rjags)
library(coda)
# Load helper functions
source("functions.R")



# =========================================================================
# =                         Data                                  		  =
# =========================================================================

dat <- read.dta("data1samp.dta", convert.factors = FALSE)

## partental background dummies
dat$zg1 <- ifelse(dat$zgold==1, 1, 0)
dat$zg2 <- ifelse(dat$zgold==2, 1, 0)
dat$zg3 <- ifelse(dat$zgold==3, 1, 0)
dat$zg4 <- ifelse(dat$zgold==4, 1, 0)
## standardize continuous vars
dat$age <- zstd(dat$age, gelman=TRUE)
dat$nkids <- zstd(dat$nkids, gelman=TRUE)
dat$hhsize <- zstd(dat$hhsize, gelman=TRUE)
dat$ownocc <- zstd(dat$ownocc, gelman=TRUE)
dat$hsequi <- zstd(dat$hsequi, gelman=TRUE)
dat$incshr <- zstd(dat$incshr, gelman=TRUE)
dat$perminc <- zstd(dat$perminc, gelman=TRUE)
dat$transinc <- zstd(dat$transinc, gelman=TRUE)
## center dummy vars
dat$div <- center(dat$div)
dat$uempl <- center(dat$uempl)
dat$union <- center(dat$union)
dat$fem <- center(dat$fem)
dat$nonwhite <- center(dat$nonwhite)
dat$degree <- center(dat$degree)
dat$alvl <- center(dat$alvl)
dat$olvl <- center(dat$olvl)
dat$zg1 <- center(dat$zg1)
dat$zg2 <- center(dat$zg2)
dat$zg3 <- center(dat$zg3)
dat$zg4 <- center(dat$zg4)
dat$london <- center(dat$london)

## reshape data:
## time varying variables
y <- as.matrix(reshape(subset(dat, select=c(y,pid,waven)), 
	timevar="waven", idvar="pid", direction="wide")[,-1])
age <- as.matrix(reshape(subset(dat, select=c(age,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
nkids <- as.matrix(reshape(subset(dat, select=c(nkids,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
hhsize <- as.matrix(reshape(subset(dat, select=c(hhsize,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
ownocc <- as.matrix(reshape(subset(dat, select=c(ownocc,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
hsequi <- as.matrix(reshape(subset(dat, select=c(hsequi,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
incshr <- as.matrix(reshape(subset(dat, select=c(incshr,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
perminc <- as.matrix(reshape(subset(dat, select=c(perminc,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
transinc <- as.matrix(reshape(subset(dat, select=c(transinc,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
div <- as.matrix(reshape(subset(dat, select=c(div,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
uempl <- as.matrix(reshape(subset(dat, select=c(uempl,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
union <- as.matrix(reshape(subset(dat, select=c(union,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])

# Time constant variables
fem <- reshape(subset(dat, select=c(female,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
nw <- reshape(subset(dat, select=c(nonwhite,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
degree <- reshape(subset(dat, select=c(degree,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
alvl <- reshape(subset(dat, select=c(alvl,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
olvl <- reshape(subset(dat, select=c(olvl,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
zg1 <- reshape(subset(dat, select=c(zg1,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
zg2 <- reshape(subset(dat, select=c(zg2,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
zg3 <- reshape(subset(dat, select=c(zg3,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
zg4 <- reshape(subset(dat, select=c(zg4,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
lond <- reshape(subset(dat, select=c(london,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]


N <- dim(y)[1]


jags.data <- gen.list(c("y", "age", "nkids", "hhsize", "ownocc", "hsequi", "incshr", "perminc", "transinc", "div", "uempl", "fem", "nw", "degree", "alvl", "olvl", "zg1", "zg2", "zg3", "zg4", "union", "lond", "N"))



# ====================================================================
# =                    get MCMC samples                              =
# ====================================================================

load.module("glm")

# cutpoint initial values
z <- jags.data$y
z[z==0] <- -0.5
z[z==1] <- 0.5
z[z==2] <- 1.5
## coefficient initial values
temp1 <- subset(dat, dat$waven==1)
temp2 <- subset(dat, dat$waven>1)
coefb <- coef(MASS::polr(as.factor(y)~age+nkids+hhsize+ownocc+hsequi+incshr+perminc+transinc+div+
	uempl+female+nonwhite+degree+alvl+olvl+union, data=temp2))
coefd <- coef(MASS::polr(as.factor(y)~age+nkids+hhsize+ownocc+hsequi+incshr+perminc+transinc+div+
	uempl+female+nonwhite+degree+alvl+olvl+union+zg1+zg2+zg3+zg4+london,data=temp1))

## create list of initial values
init <- list(
	list(z=z, gamma=.224, scale=1.156, sigma=.941, beta=coefb, delta=coefd, cut.delta=0.81272,
		 "RNG.seed"=12345),
	list(z=z, gamma=.224, scale=1.156, sigma=.941, beta=coefb, delta=coefd, cut.delta=0.81272,
		"RNG.seed"=54321))

## set up model
m1 <- jags.model(file="model1.bug", data=jags.data, n.chains=2, n.adapt=2e3, init=init)
update(m1, 2e4)

## sample...
mon <- c("alpha", "beta", "gamma", "delta", "sigma", "scale","cut[2]", 
	"ARerr", "TMatch", "ppp.obs", "ppp.pred")
m1.out <- coda.samples(m1, variable.names=mon, n.iter=2e4*25, thin=25)

## save samples
save(m1.out, file="model1_samples.Rdata")




# ====================================================================
# =                       Analyses                                   =
# ====================================================================

## Convergence diagnostics

## Gelman-Rubin diag (excludes post. pred. check values)
gelman.diag(mcmc.list(m1.out[[1]][,c(4:43,50,51)], m1.out[[2]][,c(4:43,50,51)]))
## combine samples from 2 chains
m1.out <- runjags::combine.mcmc(list(m1.out[[1]],m1.out[[2]]))
## Geweke diagnostics
geweke.diag(m1.out)


# ==============================================
# =     Posterior summary   
# ==============================================

sum.coda(m1.out)

# Individual effect variance (samples are for SD)
sum.coda(as.matrix(m1.out[,51]^2))

# Individual effect variance share
sum.coda(as.matrix(m1.out[,"sigma"]^2/(1+m1.out[,"sigma"]^2)))


# ==============================================
# =    Posterior predictive checks
# ==============================================
par(mgp=c(0, 0.45, 0), mar=c(2.6, 0.1, 2.1, 0.1), mfrow=c(1,3))

hist(m1.out[,"ppp.pred[1]"], breaks=30, freq=FALSE, col="gray70", border="white", axes=FALSE, main="", ylab="", xlab="")
abline(v=mean(m1.out[,"ppp.obs[1]"]), col="red", lwd=2.2)
axis(1, tcl=-.25, cex.axis=1.05)
mtext("Preference -", side=3, line=.7, cex=.9, font=2)
mean(m1.out[,"ppp.pred[1]"]>m1.out[,"ppp.obs[1]"])

hist(m1.out[,"ppp.pred[2]"], breaks=30, freq=FALSE, col="gray70", border="white", axes=FALSE, main="", ylab="", xlab="", family="bold")
abline(v=mean(m1.out[,"ppp.obs[2]"]), col="red", lwd=2.2)
axis(1, tcl=-.25, cex.axis=1.05)
mtext("Preference =", side=3, line=.7, cex=.9, font=2)
mean(m1.out[,"ppp.pred[2]"]>m1.out[,"ppp.obs[2]"])

hist(m1.out[,"ppp.pred[3]"], breaks=30, freq=FALSE, col="gray70", border="white", axes=FALSE, main="", ylab="", xlab="")
abline(v=mean(m1.out[,"ppp.obs[3]"]), col="red", lwd=2.2)
mtext("Preference +", side=3, line=.7, cex=.9, font=2)
axis(1, tcl=-.25, cex.axis=1.05)
mean(m1.out[,"ppp.pred[3]"]>m1.out[,"ppp.obs[3]"])



# ==============================================
# =         Long-term effects    
# ==============================================

# steady state effects (y* metric):

# sample 5000 values from posterior
m1.coef.samp <- apply(m1.out[,5:20], 2, sample, 5000)
m1.gamma.samp <- sample(m1.out[,"gamma"], 5000)
m1.steady.state <- m1.coef.samp / (1-m1.gamma.samp)
sum.coda(m1.steady.state)


# steady state effects (P(y=highest category))

# sample of 5000 values from posterior
m1.coef.samp2 <- apply(m1.out, 2, sample, 5000)

# Function to calculate predicted probabilities
# Function inputs:
# sims: matrix of simulated coef values
# xvals vector of beta's X-values
# x-values order in model:
# age nkids hhsi own equi incshr perm trans div uempl fem nw
# degree alvl olvl union
# remember: all inputs have mean 0, continous var have sd=0.5

predict <- function(sims, xvals) {
	alpha <- sims[,"alpha"]
	gamma <- sims[,"gamma"]
	cut <- sims[,"cut[2]"]
	beta.mat <- sims[,5:20]
	beta.mat.scaled <- beta.mat/(1-gamma)
	linpred <- alpha + beta.mat.scaled%*%xvals
	p1 <- pnorm(0 - linpred)
	p2 <- pnorm(cut - linpred) - (pnorm(0 - linpred))
	p3 <- 1 - pnorm(cut - linpred)
	return(cbind(p1,p2,p3))
}

# calculate 1 sd change for continuous vars
# output: mean,sd,low,high hpd for cat.1 and cat.3
diffs <- function(values, sims) {
	out <- matrix(NA, ncol=8, nrow=length(values))
	j <- 1
	for (i in values){
		xvals  <- rep(0,16)
		xvals[i] <- -0.5
		p0 <- predict(sims, xvals)
		xvals[i] <- 0.5
		p1 <- predict(sims, xvals)
		out[j,1:4] <- sum.coda(as.matrix(p1[,1]-p0[,1]))
		out[j,5:8] <- sum.coda(as.matrix(p1[,3]-p0[,3]))
		j <- j+1
	}
	return(out)
}
# Age, hs equi, incshr, perminc
round(diffs(c(1,2,3,5,6,7,8), m1.coef.samp2),3)


# Calculations for discrete vars
# (Values for covariates are after centering)

# Union
xvals  <- rep(0,16)
xvals[16] <- -0.231313578062804
p0 <- predict(m1.coef.samp2, xvals)
xvals[16] <- 0.768686421937196
p1 <- predict(m1.coef.samp2, xvals)
sum.coda(as.matrix(p1[,1]-p0[,1]))
sum.coda(as.matrix(p1[,3]-p0[,3]))

# Female
xvals  <- rep(0,16)
xvals[11] <- -0.514705882352941
p0 <- predict(m1.coef.samp2, xvals)
xvals[11] <- 0.485294117647059
p1 <- predict(m1.coef.samp2, xvals)
sum.coda(as.matrix(p1[,1]-p0[,1]))
sum.coda(as.matrix(p1[,3]-p0[,3]))

# Nonwhite
xvals  <- rep(0,16)
xvals[12] <- -0.0340557275541796
p0 <- predict(m1.coef.samp2, xvals)
xvals[12] <- 0.96594427244582
p1 <- predict(m1.coef.samp2, xvals)
sum.coda(as.matrix(p1[,1]-p0[,1]))
sum.coda(as.matrix(p1[,3]-p0[,3]))

# Degree
xvals  <- rep(0,16)
xvals[13] <- -0.196594427244582
p0 <- predict(m1.coef.samp2, xvals)
xvals[13] <- 0.803405572755418
p1 <- predict(m1.coef.samp2, xvals)
sum.coda(as.matrix(p1[,1]-p0[,1]))
sum.coda(as.matrix(p1[,3]-p0[,3]))

# Alevel
xvals  <- rep(0,16)
xvals[14] <- -0.190402476780186
p0 <- predict(m1.coef.samp2, xvals)
xvals[14] <- 0.809597523219814
p1 <- predict(m1.coef.samp2, xvals)
sum.coda(as.matrix(p1[,1]-p0[,1]))
sum.coda(as.matrix(p1[,3]-p0[,3]))

# Olevel
xvals  <- rep(0,16)
xvals[15] <- -0.419504643962848
p0 <- predict(m1.coef.samp2, xvals)
xvals[15] <- 0.580495356037152
p1 <- predict(m1.coef.samp2, xvals)
sum.coda(as.matrix(p1[,1]-p0[,1]))
sum.coda(as.matrix(p1[,3]-p0[,3]))

# H owner
xvals  <- rep(0,16)
xvals[4] <- -1.03954957105278
p0 <- predict(m1.coef.samp2, xvals)
xvals[4] <- 0.240462181241481
p1 <- predict(m1.coef.samp2, xvals)
sum.coda(as.matrix(p1[,1]-p0[,1]))
sum.coda(as.matrix(p1[,3]-p0[,3]))

# Divorced
xvals  <- rep(0,16)
xvals[9] <- -0.0568332596196373 
p0 <- predict(m1.coef.samp2, xvals)
xvals[9] <- 0.943166740380363
p1 <- predict(m1.coef.samp2, xvals)
sum.coda(as.matrix(p1[,1]-p0[,1]))
sum.coda(as.matrix(p1[,3]-p0[,3]))

# Unemployed
xvals  <- rep(0,16)
xvals[10] <- -0.0339451570101725
p0 <- predict(m1.coef.samp2, xvals)
xvals[10] <- 0.966054842989827
p1 <- predict(m1.coef.samp2, xvals)
sum.coda(as.matrix(p1[,1]-p0[,1]))
sum.coda(as.matrix(p1[,3]-p0[,3]))




















